Load the required libraries.
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(sf)
Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(here)
here() starts at /Users/petermacpherson/Documents/Documents - Peter’s Mac mini/Projects/Historical TB ACF 2023-11-28/Work/analysis/glasgow-cxr
library(readxl)
library(scales)
Attaching package: ‘scales’
The following object is masked from ‘package:purrr’:
discard
The following object is masked from ‘package:readr’:
col_factor
library(DT)
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
library(brms)
Loading required package: Rcpp
Loading 'brms' package (version 2.21.0). Useful instructions
can be found by typing help('brms'). A more detailed introduction
to the package is available through vignette('brms_overview').
Attaching package: ‘brms’
The following object is masked from ‘package:stats’:
ar
library(tidybayes)
Attaching package: ‘tidybayes’
The following objects are masked from ‘package:brms’:
dstudent_t, pstudent_t, qstudent_t, rstudent_t
library(patchwork)
library(marginaleffects)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
library(ggrepel)
library(scico)
library(ggdensity)
library(ggpubr)
library(units)
udunits database from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/units/share/udunits/udunits2.xml
library(glue)
library(ggh4x)
Attaching package: ‘ggh4x’
The following object is masked from ‘package:ggplot2’:
guide_axis_logticks
Functions that we will use throughout the script
#labeller for years
year_labels <- c(1950:1963)
#The Glasgow mass minuture chest X-ray campaign happened between 11th March and 12th April 1957
#Segment for graphs to match ACF period
acf_start <- decimal_date(ymd("1957-03-11"))
acf_end <- decimal_date(ymd("1957-04-12"))
Function for counterfactual plots
plot_counterfactual <- function(model_data, model, population_denominator, outcome, grouping_var=NULL, re_formula,...){
#labeller for years
year_labels <- c(1950:1964) #extra year for the extant of the x-axis
#The Glasgow mass minuture chest X-ray campaign happened between 11th March and 12th April 1957
#Segment for graphs to match ACF period
acf_start <- decimal_date(ymd("1957-03-11"))
acf_end <- decimal_date(ymd("1957-04-12"))
summary <- {{model_data}} %>%
select(year, year2, y_num, acf_period, {{population_denominator}}, {{outcome}}, {{grouping_var}}) %>%
add_epred_draws({{model}}, re_formula={{re_formula}}) %>%
group_by(year2, acf_period, {{grouping_var}}) %>%
mean_qi() %>%
mutate(.epred_inc = .epred/{{population_denominator}}*100000,
.epred_inc.lower = .epred.lower/{{population_denominator}}*100000,
.epred_inc.upper = .epred.upper/{{population_denominator}}*100000) %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Before Intervention",
acf_period=="c. post-acf" ~ "Post Intervention"))
#create the counterfactual (no intervention), and summarise
counterfact <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}, {{outcome}}) %>%
mutate(acf_period = "a. pre-acf"), re_formula={{re_formula}}) %>%
group_by(year2, acf_period, {{grouping_var}}) %>%
mean_qi() %>%
mutate(.epred_inc = .epred/{{population_denominator}}*100000,
.epred_inc.lower = .epred.lower/{{population_denominator}}*100000,
.epred_inc.upper = .epred.upper/{{population_denominator}}*100000) %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Before Intervention",
acf_period=="c. post-acf" ~ "Post Intervention"))
#plot the intervention effect
p <- summary %>%
droplevels() %>%
ggplot() +
geom_vline(aes(xintercept=acf_start, linetype="Mass CXR screening intervention")) +
geom_vline(aes(xintercept=acf_end, linetype="Mass CXR screening intervention")) +
geom_ribbon(aes(ymin=.epred_inc.lower, ymax=.epred_inc.upper, x=year2, group = acf_period, fill=acf_period), alpha=0.5) +
geom_ribbon(data = counterfact %>% filter(year>=1956),
aes(ymin=.epred_inc.lower, ymax=.epred_inc.upper, x=year2, fill="Counterfactual"), alpha=0.5) +
geom_line(data = counterfact %>% filter(year>=1956),
aes(y=.epred_inc, x=year2, colour="Counterfactual")) +
geom_line(aes(y=.epred_inc, x=year2, group=acf_period, colour=acf_period)) +
geom_point(data = {{model_data}} %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Pre-ACF",
acf_period=="b. acf" ~ "ACF",
acf_period=="c. post-acf" ~ "Post-ACF")),
aes(y={{outcome}}, x=year2, shape=fct_relevel(acf_period,
"Pre-ACF",
"ACF",
"Post-ACF")), size=2) +
theme_grey() +
scale_y_continuous(labels=comma, limits =c(0,400)) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
scale_fill_manual(values = c("#DE0D92", "grey50", "#4D6CFA") , name="Model estimates:", na.translate = F) +
scale_colour_manual(values = c("#DE0D92", "grey50", "#4D6CFA") , name="Model estimates:", na.translate = F) +
scale_shape_discrete(name="Empirical data (period):", na.translate = F) +
scale_linetype_manual(values = 2, name="") +
labs(
x = "",
y = "CNR (per 100,000)"
) +
guides(x = "axis_truncated", y = "axis_truncated") +
theme(legend.position = "bottom",
legend.box="vertical",
text = element_text(size=10),
axis.text.x = element_text(size=10, angle = 90, hjust=1, vjust=0.5),
legend.text = element_text(size=10),
legend.spacing.y = unit(0.1, 'cm'),
axis.line = element_line(colour = "black"))
facet_vars <- vars(...)
if (length(facet_vars) != 0) {
p <- p + facet_wrap(facet_vars)
}
p
}
Function for calculating measures of change over time (RR.peak, RR.level, RR.slope)
summarise_change <- function(model_data, model, population_denominator, grouping_var = NULL, re_formula = NULL) {
#functions for calculating RR.peak
#i.e. relative case notification rate in 1957 vs. counterfactual trend for 1957
grouping_var <- enquo(grouping_var)
if (!is.null({{grouping_var}})) {
#make the prediction matrix, conditional on whether we want random effects included or not.
out <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num, !!grouping_var) %>%
filter(y_num == 8),
acf_period = c("a. pre-acf", "b. acf")
)
} else {
out <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num) %>%
filter(y_num == 8),
acf_period = c("a. pre-acf", "b. acf")
)
}
peak_draws <- add_epred_draws(newdata = out,
object = {{model}},
re_formula = {{re_formula}}) %>%
mutate(epred_cnr = .epred/population_without_inst_ship*100000) %>%
group_by(.draw, !!grouping_var) %>%
summarise(estimate = last(epred_cnr)/first(epred_cnr)) %>%
ungroup() %>%
mutate(measure = "RR.peak")
peak_summary <- peak_draws %>%
group_by(!!grouping_var) %>%
mean_qi(estimate) %>%
mutate(measure = "RR.peak")
#functions for calculating RR.level
#i.e. relative case notification rate in 1958 vs. counterfactual trend for 1958
if (!is.null({{grouping_var}})) {
out2 <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num, !!grouping_var) %>%
filter(y_num == 9),
acf_period = c("a. pre-acf", "c. post-acf")
)
} else {
out2 <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num) %>%
filter(y_num == 9),
acf_period = c("a. pre-acf", "c. post-acf")
)
}
level_draws <- add_epred_draws(newdata = out2,
object = {{model}},
re_formula = {{re_formula}}) %>%
arrange(y_num, .draw) %>%
mutate(epred_cnr = .epred/population_without_inst_ship*100000) %>%
group_by(.draw, !!grouping_var) %>%
summarise(estimate = last(epred_cnr)/first(epred_cnr)) %>%
ungroup() %>%
mutate(measure = "RR.level")
level_summary <- level_draws %>%
group_by(!!grouping_var) %>%
mean_qi(estimate) %>%
mutate(measure = "RR.level")
#functions for calculating RR.slope
#i.e. relative change in case notification rate in 1958-1963 vs. counterfactual trend for 1959-1963
if (!is.null({{grouping_var}})) {
out3 <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num, !!grouping_var) %>%
filter(y_num %in% c(9,14)),
acf_period = c("a. pre-acf", "c. post-acf")
)
} else {
out3 <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num) %>%
filter(y_num %in% c(9,14)),
acf_period = c("a. pre-acf", "c. post-acf")
)
}
slope_draws <- add_epred_draws(newdata = out3,
object = {{model}},
re_formula = {{re_formula}}) %>%
arrange(y_num) %>%
ungroup() %>%
mutate(epred_cnr = .epred/population_without_inst_ship*100000) %>%
group_by(.draw, y_num, !!grouping_var) %>%
summarise(slope = last(epred_cnr)/first(epred_cnr)) %>%
ungroup() %>%
group_by(.draw, !!grouping_var) %>%
summarise(estimate = last(slope)/first(slope)) %>%
mutate(measure = "RR.slope")
slope_summary <- slope_draws %>%
group_by(!!grouping_var) %>%
mean_qi(estimate) %>%
mutate(measure = "RR.slope")
#gather all the results into a named list
lst(peak_draws=peak_draws, peak_summary=peak_summary,
level_draws=level_draws, level_summary=level_summary,
slope_draws=slope_draws, slope_summary=slope_summary)
}
Function for calculating difference from counterfactual
calculate_counterfactual <- function(model_data, model, population_denominator, grouping_var=NULL, re_formula=NA){
#effect vs. counterfactual
counterfact <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}) %>%
mutate(acf_period = "a. pre-acf"),
re_formula = {{re_formula}}) %>%
group_by(.draw, year, {{grouping_var}}, acf_period) %>%
mutate(.epred_inc_counterf = .epred/{{population_denominator}}*100000, .epred_counterf=.epred) %>%
filter(year>1957) %>%
ungroup() %>%
select(year, {{population_denominator}}, .draw, .epred_counterf, .epred_inc_counterf, {{grouping_var}})
#Calcuate case notification rate per draw, then summarise.
post_change <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}, acf_period),
re_formula = {{re_formula}}) %>%
group_by(.draw, year, {{grouping_var}}, acf_period) %>%
mutate(.epred_inc = .epred/{{population_denominator}}*100000) %>%
filter(year>1957) %>%
ungroup() %>%
select(year, {{population_denominator}}, {{grouping_var}}, .draw, .epred, .epred_inc, {{grouping_var}})
#for the overall period
counterfact_overall <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}) %>%
mutate(acf_period = "a. pre-acf"),
re_formula = {{re_formula}}) %>%
group_by(.draw, {{grouping_var}}) %>%
filter(year>1957) %>%
ungroup() %>%
select({{population_denominator}}, .draw, .epred, {{grouping_var}}) %>%
group_by(.draw, {{grouping_var}}) %>%
summarise(.epred_counterf = sum(.epred))
#Calcuate case notification rate per draw, then summarise.
post_change_overall <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}, acf_period),
re_formula = {{re_formula}}) %>%
group_by(.draw, {{grouping_var}}) %>%
filter(year>1957) %>%
ungroup() %>%
select({{population_denominator}}, {{grouping_var}}, .draw, .epred) %>%
group_by(.draw, {{grouping_var}}) %>%
summarise(.epred = sum(.epred))
counter_post <-
left_join(counterfact, post_change) %>%
mutate(cases_averted = .epred_counterf-.epred,
pct_change = (.epred - .epred_counterf)/.epred_counterf,
diff_inc100k = .epred_inc - .epred_inc_counterf,
rr_inc100k = .epred_inc/.epred_inc_counterf) %>%
group_by(year, {{grouping_var}}) %>%
mean_qi(cases_averted, pct_change, diff_inc100k, rr_inc100k) %>%
ungroup()
counter_post_overall <-
left_join(counterfact_overall, post_change_overall) %>%
mutate(cases_averted = .epred_counterf-.epred,
pct_change = (.epred - .epred_counterf)/.epred_counterf) %>%
group_by({{grouping_var}}) %>%
mean_qi(cases_averted, pct_change) %>%
ungroup()
lst(counter_post, counter_post_overall)
}
Function for tidying up counterfactuals (mostly for making nice tables)
tidy_counterfactuals <- function(data){
data %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
mutate(year = as.character(year),
cases_averted = glue::glue("{cases_averted} ({cases_averted.lower} to {cases_averted.upper})"),
pct_change = glue::glue("{pct_change} ({pct_change.lower} to {pct_change.upper})"),
diff_inc = glue::glue("{diff_inc100k} ({diff_inc100k.lower} to {diff_inc100k.upper})"),
rr_inc = glue::glue("{rr_inc100k} ({rr_inc100k.lower} to {rr_inc100k.upper})"))
}
tidy_counterfactuals_overall <- function(data){
data %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
mutate(year = as.character(year),
cases_averted = glue::glue("{cases_averted} ({cases_averted.lower} to {cases_averted.upper})"),
pct_change = glue::glue("{pct_change} ({pct_change.lower} to {pct_change.upper})"))
}
Import datasets for analysis
Make a map of Glasgow wards
glasgow_wards_1951 <- st_read(here("mapping/glasgow_wards_1951.geojson"))
Reading layer `glasgow_wards_1951' from data source
`/Users/petermacpherson/Documents/Documents - Peter’s Mac mini/Projects/Historical TB ACF 2023-11-28/Work/analysis/glasgow-cxr/mapping/glasgow_wards_1951.geojson'
using driver `GeoJSON'
Simple feature collection with 37 features and 3 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -4.393502 ymin: 55.77464 xmax: -4.070411 ymax: 55.92814
Geodetic CRS: WGS 84
#read in Scotland boundary
scotland <- st_read(here("mapping/Scotland_boundary/Scotland boundary.shp"))
Reading layer `Scotland boundary' from data source
`/Users/petermacpherson/Documents/Documents - Peter’s Mac mini/Projects/Historical TB ACF 2023-11-28/Work/analysis/glasgow-cxr/mapping/Scotland_boundary/Scotland boundary.shp'
using driver `ESRI Shapefile'
Simple feature collection with 1 feature and 1 field
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 5513 ymin: 530249 xmax: 470332 ymax: 1220302
Projected CRS: OSGB36 / British National Grid
#make a bounding box for Glasgow
bbox <- st_bbox(glasgow_wards_1951) |> st_as_sfc()
#plot scotland with a bounding box around the City of Glasgow
scotland_with_bbox <- ggplot() +
geom_sf(data = scotland, fill="antiquewhite") +
geom_sf(data = bbox, colour = "#C60C30", fill="antiquewhite") +
theme_void() +
theme(panel.border = element_rect(colour = "grey78", fill=NA, linewidth = 0.5),
panel.background = element_rect(fill = "#EAF7FA", size = 0.3))
Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
ℹ Please use the `linewidth` argument instead.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
#plot the wards
#note we tidy up some names to fit on map
glasgow_ward_map <- glasgow_wards_1951 %>%
mutate(ward = case_when(ward=="Shettleston and Tollcross" ~ "Shettleston and\nTollcross",
ward=="Partick (West)" ~ "Partick\n(West)",
ward=="Partick (East)" ~ "Partick\n(East)",
ward=="North Kelvin" ~ "North\nKelvin",
ward=="Kinning Park" ~ "Kinning\nPark",
TRUE ~ ward)) %>%
ggplot() +
geom_sf(aes(fill=division)) +
geom_sf_label(aes(label = ward), size=3, fill=NA, label.size = NA, colour="black") +
#scale_colour_identity() +
scale_fill_brewer(palette = "Set3", name="City of Glasgow Division") +
theme_grey() +
labs(x="",
y="",
fill="Division") +
theme(legend.position = "top",
panel.border = element_rect(colour = "grey78", fill=NA, linewidth = 0.5),
panel.background = element_rect(fill = "antiquewhite", size = 0.3),
panel.grid.major = element_line(color = "grey78")) +
guides(fill=guide_legend(title.position = "top", title.hjust = 0.5, title.theme = element_text(face="bold")))
#add the map of scotland as an inset
glasgow_ward_map + inset_element(scotland_with_bbox, 0.75, 0, 1, 0.4)
Warning in st_point_on_surface.sfc(sf::st_zm(x)) :
st_point_on_surface may not give correct results for longitude/latitude data
ggsave(here("figures/s1.tiff"), height=10, width = 12)
Warning in st_point_on_surface.sfc(sf::st_zm(x)) :
st_point_on_surface may not give correct results for longitude/latitude data
Load in the datasets for denonomiators, and check for consistency.
overall_pops <- read_xlsx(path = "2023-11-28_glasgow-acf.xlsx", sheet = "overall_population")
overall_pops %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
#shift year to midpoint
overall_pops <- overall_pops %>%
mutate(year2 = year+0.5)
Note, we have three population estimates:
(Population in shipping is estimated from the 1951 census, so is the same for most years)
First, plot the total population
overall_pops %>%
ggplot() +
geom_area(aes(y=total_population, x=year2), alpha=0.5, colour = "mediumseagreen", fill="mediumseagreen") +
geom_point(aes(y=total_population, x=year2), colour = "mediumseagreen") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
labs(
title = "Glasgow Corporation: total population",
subtitle = "1950 to 1963",
x = "Year",
y = "Population",
caption = "Mid-year estimates\nMass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist()
NA
NA
Now the population excluding institutionalised and shipping population
overall_pops %>%
ggplot() +
geom_area(aes(y=population_without_inst_ship, x=year2), alpha=0.5, colour = "purple", fill="purple") +
geom_point(aes(y=population_without_inst_ship, x=year2), colour = "purple") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
labs(
title = "Glasgow Corporation: population excluding institutionalised and shipping",
subtitle = "1950 to 1963",
x = "Year",
y = "Population",
caption = "Mid-year estimates\nMass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist()
NA
NA
There are 5 Divisions containing 37 Wards in the Glasgow Corporation, with consistent boundaries over time.
#look-up table for divisions and wards
ward_lookup <- read_xlsx(path = "2023-11-28_glasgow-acf.xlsx", sheet = "divisions_wards")
ward_pops <- read_xlsx(path = "2023-11-28_glasgow-acf.xlsx", sheet = "ward_population")
ward_pops %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
#shift year to midpoint
ward_pops <- ward_pops %>%
mutate(year2 = year+0.5)
#Get the Division population
division_pops <- ward_pops %>%
group_by(division, year) %>%
summarise(population_without_inst_ship = sum(population_without_inst_ship, na.rm = TRUE),
institutions = sum(institutions, na.rm = TRUE),
shipping = sum(shipping, na.rm = TRUE),
total_population = sum(total_population, na.rm = TRUE))
`summarise()` has grouped output by 'division'. You can override using the `.groups` argument.
division_pops %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
NA
Plot the overall population by Division and Ward
division_pops %>%
mutate(year2 = year+0.5) %>%
ggplot() +
geom_area(aes(y=total_population, x=year2, colour=division, fill=division), alpha=0.8) +
geom_point(aes(y=total_population, x=year2, colour=division)) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
facet_wrap(division~.) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
scale_fill_brewer(palette = "Set3", name = "") +
scale_colour_brewer(palette = "Set3", name = "") +
labs(
title = "Glasgow Corporation: total population by Division",
subtitle = "1950 to 1963",
x = "Year",
y = "Population",
caption = "Mid-year estimates\nMass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist() +
theme(legend.position = "bottom")
NA
NA
ward_pops %>%
ggplot() +
geom_area(aes(y=total_population, x=year2, colour=division, fill=division)) +
geom_point(aes(y=total_population, x=year2, colour=division), colour="black") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
facet_wrap(ward~., ncol=6) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
scale_fill_brewer(palette = "Set3", name="Division") +
scale_colour_brewer(palette = "Set3", name = "Division") +
labs(
x = "",
y = "Population",
caption = "Mid-year estimates\nMass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist() +
theme(legend.position = "bottom")
ggsave(here("figures/s2.tiff"), height=14, width=12)
Approximately, how many person-years of follow-up do we have?
overall_pops %>%
ungroup() %>%
summarise(across(year, length, .names = "years"),
across(c(population_without_inst_ship, total_population), sum)) %>%
mutate(across(where(is.double), comma)) %>%
datatable()
NA
NA
Change in population by ward
ward_pops %>%
group_by(ward) %>%
summarise(pct_change_pop = (last(population_without_inst_ship) - first(population_without_inst_ship))/first(population_without_inst_ship)) %>%
mutate(pct_change_pop = percent(pct_change_pop)) %>%
arrange(pct_change_pop) %>%
datatable()
NA
NA
NA
age_sex <- read_xlsx(path = "2023-11-28_glasgow-acf.xlsx", sheet = "age_sex_population") %>%
pivot_longer(cols = c(male, female),
names_to = "sex")
#collapse down to smaller age groups to be manageable
age_sex <- age_sex %>%
ungroup() %>%
mutate(age = case_when(age == "0 to 4" ~ "00 to 04",
age == "5 to 9" ~ "05 to 14",
age == "10 to 14" ~ "05 to 14",
age == "15 to 19" ~ "15 to 24",
age == "20 to 24" ~ "15 to 24",
age == "25 to 29" ~ "25 to 34",
age == "30 to 34" ~ "25 to 34",
age == "35 to 39" ~ "35 to 44",
age == "40 to 44" ~ "35 to 44",
age == "45 to 49" ~ "45 to 59",
age == "50 to 54" ~ "45 to 59",
age == "55 to 59" ~ "45 to 59",
TRUE ~ "60 & up")) %>%
group_by(year, age, sex) %>%
mutate(value = sum(value)) %>%
ungroup()
m_age_sex <- lm(value ~ splines::ns(year, knots = 3)*age*sex, data = age_sex)
summary(m_age_sex)
Warning in summary.lm(m_age_sex) :
essentially perfect fit: summary may be unreliable
Call:
lm(formula = value ~ splines::ns(year, knots = 3) * age * sex,
data = age_sex)
Residuals:
Min 1Q Median 3Q Max
-2.107e-10 -7.560e-13 0.000e+00 0.000e+00 2.107e-10
Coefficients: (14 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.222e+04 3.820e-10 1.367e+14 <2e-16 ***
splines::ns(year, knots = 3)1 -8.043e+03 7.621e-10 -1.055e+13 <2e-16 ***
splines::ns(year, knots = 3)2 NA NA NA NA
age05 to 14 3.669e+04 4.679e-10 7.843e+13 <2e-16 ***
age15 to 24 -3.893e+03 4.679e-10 -8.320e+12 <2e-16 ***
age25 to 34 -3.996e+04 4.679e-10 -8.540e+13 <2e-16 ***
age35 to 44 -4.230e+04 4.679e-10 -9.040e+13 <2e-16 ***
age45 to 59 5.459e+04 4.411e-10 1.238e+14 <2e-16 ***
age60 & up 7.533e+04 4.126e-10 1.826e+14 <2e-16 ***
sexmale 3.374e+03 5.402e-10 6.244e+12 <2e-16 ***
splines::ns(year, knots = 3)1:age05 to 14 -1.863e+03 9.334e-10 -1.996e+12 <2e-16 ***
splines::ns(year, knots = 3)2:age05 to 14 NA NA NA NA
splines::ns(year, knots = 3)1:age15 to 24 7.533e+04 9.334e-10 8.070e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age15 to 24 NA NA NA NA
splines::ns(year, knots = 3)1:age25 to 34 1.325e+05 9.334e-10 1.420e+14 <2e-16 ***
splines::ns(year, knots = 3)2:age25 to 34 NA NA NA NA
splines::ns(year, knots = 3)1:age35 to 44 1.380e+05 9.334e-10 1.479e+14 <2e-16 ***
splines::ns(year, knots = 3)2:age35 to 44 NA NA NA NA
splines::ns(year, knots = 3)1:age45 to 59 3.474e+03 8.800e-10 3.948e+12 <2e-16 ***
splines::ns(year, knots = 3)2:age45 to 59 NA NA NA NA
splines::ns(year, knots = 3)1:age60 & up -8.453e+04 8.232e-10 -1.027e+14 <2e-16 ***
splines::ns(year, knots = 3)2:age60 & up NA NA NA NA
splines::ns(year, knots = 3)1:sexmale -1.994e+03 1.078e-09 -1.850e+12 <2e-16 ***
splines::ns(year, knots = 3)2:sexmale NA NA NA NA
age05 to 14:sexmale 1.053e+04 6.617e-10 1.592e+13 <2e-16 ***
age15 to 24:sexmale 2.352e+04 6.617e-10 3.555e+13 <2e-16 ***
age25 to 34:sexmale 1.355e+04 6.617e-10 2.047e+13 <2e-16 ***
age35 to 44:sexmale -1.727e+03 6.617e-10 -2.611e+12 <2e-16 ***
age45 to 59:sexmale 2.774e+03 6.238e-10 4.446e+12 <2e-16 ***
age60 & up:sexmale -7.761e+04 5.835e-10 -1.330e+14 <2e-16 ***
splines::ns(year, knots = 3)1:age05 to 14:sexmale -2.049e+04 1.320e-09 -1.552e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age05 to 14:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age15 to 24:sexmale -6.780e+04 1.320e-09 -5.136e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age15 to 24:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age25 to 34:sexmale -3.804e+04 1.320e-09 -2.882e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age25 to 34:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age35 to 44:sexmale -1.171e+04 1.320e-09 -8.874e+12 <2e-16 ***
splines::ns(year, knots = 3)2:age35 to 44:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age45 to 59:sexmale -3.473e+04 1.244e-09 -2.791e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age45 to 59:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age60 & up:sexmale 1.056e+05 1.164e-09 9.071e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age60 & up:sexmale NA NA NA NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 5.755e-11 on 44 degrees of freedom
Multiple R-squared: 1, Adjusted R-squared: 1
F-statistic: 1.714e+29 on 27 and 44 DF, p-value: < 2.2e-16
age_levels <- age_sex %>% select(age) %>% distinct() %>% pull()
age_sex_nd <-
crossing(
age=age_levels,
sex=c("male", "female"),
year = 1950:1963
)
pred_pops <- age_sex_nd %>% modelr::add_predictions(m_age_sex)
Warning in predict.lm(model, data) :
prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
pred_pops %>%
ggplot(aes(x=year, y=pred, colour=age)) +
geom_line() +
geom_point() +
facet_grid(sex~.) +
scale_y_continuous(labels = comma, limits = c(0, 125000))
#How well do they match up with our overall populations?
pred_pops %>%
group_by(year) %>%
summarise(sum_pred_pop = sum(pred)) %>%
right_join(overall_pops) %>%
select(year, sum_pred_pop, population_without_inst_ship, total_population) %>%
pivot_longer(cols = c(sum_pred_pop, population_without_inst_ship, total_population)) %>%
ggplot(aes(x=year, y=value, colour=name)) +
geom_point() +
scale_y_continuous(labels = comma, limits = c(800000, 1250000))
Joining with `by = join_by(year)`
pred_pops %>%
group_by(year, sex) %>%
summarise(sum = sum(pred)) %>%
group_by(year) %>%
mutate(sex_ratio = first(sum)/last(sum))
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
Population pyramids
label_abs <- function(x) {
comma(abs(x))
}
pred_pops %>%
ungroup() %>%
group_by(year) %>%
mutate(year_pop = sum(pred),
age_sex_pct = percent(pred/year_pop, accuracy=0.1)) %>%
mutate(sex = case_when(sex=="male" ~ "Male",
sex=="female" ~ "Female")) %>%
ggplot(
aes(x = age, fill = sex,
y = ifelse(test = sex == "Female",yes = -pred, no = pred))) +
geom_bar(stat = "identity") +
geom_text(aes(label = age_sex_pct),
position= position_stack(vjust=0.5), colour="black", size=2.5) +
facet_wrap(year~., ncol=7) +
coord_flip() +
scale_y_continuous(labels = label_abs) +
scale_fill_manual(values = c("#CD7AC5", "cadetblue3"), name="") +
theme_ggdist() +
theme(axis.text.x = element_text(angle=90, hjust = 1, vjust=0.5),
legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA)) +
labs(x="", y="")
ggsave(here("figures/s3.tiff"), width=10)
Saving 10 x 4.51 in image
Not perfect, but resonably good. But ahhhhh… the age groups don’t align with the case notification age groups! Come back to think about this later.
Import the tuberculosis cases dataset
Overall, by year.
cases_by_year <- read_xlsx("2023-11-28_glasgow-acf.xlsx", sheet = "by_year")
cases_by_year%>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
#shift year to midpoint
cases_by_year <- cases_by_year %>%
mutate(year2 = year+0.5)
Plot the overall number of case notified per year, by pulmonary and extra pulmonary classification.
cases_by_year %>%
select(-total_notifications, -year) %>%
pivot_longer(cols = c(pulmonary_notifications, `non-pulmonary_notifications`)) %>%
mutate(name = case_when(name == "pulmonary_notifications" ~ "Pulmonary TB",
name == "non-pulmonary_notifications" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=value, x=year2, group = name, fill=name), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis notifications",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Number of cases",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist() +
theme(legend.position = "bottom")
NA
NA
Read in the datasets and merge together.
#list all the sheets
all_sheets <- excel_sheets("2023-11-28_glasgow-acf.xlsx")
#get the ward sheets
ward_sheets <- enframe(all_sheets) %>%
filter(grepl("by_ward", value)) %>%
pull(value)
cases_by_ward_sex_year <- map_df(ward_sheets, ~read_xlsx(path = "2023-11-28_glasgow-acf.xlsx",
sheet = .))
cases_by_ward_sex_year %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
NA
Aggregate together to get cases by division
cases_by_division <- cases_by_ward_sex_year %>%
left_join(ward_lookup) %>%
group_by(division, year, tb_type) %>%
summarise(cases = sum(cases, na.rm = TRUE))
Joining with `by = join_by(ward)`
`summarise()` has grouped output by 'division', 'year'. You can override using the `.groups` argument.
#shift year to midpoint
cases_by_division <- cases_by_division %>%
mutate(year2 = year+0.5) %>%
ungroup()
cases_by_division %>%
select(-year2) %>%
select(year, everything()) %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
cases_by_division %>%
mutate(tb_type = case_when(tb_type == "Pulmonary" ~ "Pulmonary TB",
tb_type == "Non-Pulmonary" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=cases, x=year2, group = tb_type, fill=tb_type), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
facet_wrap(division~., scales = "free_y") +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis notifications by Division",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Number of cases",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: extra-pulmonary TB cases by Division/Ward not reported in 1962-1963"
) +
theme_ggdist() +
theme(legend.position = "bottom")
cases_by_ward <- cases_by_ward_sex_year %>%
group_by(ward, year, tb_type) %>%
summarise(cases = sum(cases, na.rm = TRUE)) %>%
ungroup()
`summarise()` has grouped output by 'ward', 'year'. You can override using the `.groups` argument.
cases_by_ward %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
select(year, everything()) %>%
datatable()
#shift year to midpoint
cases_by_ward <- cases_by_ward %>%
mutate(year2 = year+0.5)
cases_by_ward %>%
mutate(tb_type = case_when(tb_type == "Pulmonary" ~ "Pulmonary TB",
tb_type == "Non-Pulmonary" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=cases, x=year2, group = tb_type, fill=tb_type), alpha=0.8) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
facet_wrap(ward~., scales = "free_y") +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis notifications by Ward",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Number of cases",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: extra-pulmonary TB cases by Division/Ward not reported in 1962-1963"
) +
theme(legend.position = "bottom")
NA
NA
As we don’t have denominators, we will just model the change in counts.
#list all the sheets
all_sheets <- excel_sheets("2023-11-28_glasgow-acf.xlsx")
#get the ward sheets
age_sex_sheets <- enframe(all_sheets) %>%
filter(grepl("by_age_sex", value)) %>%
pull(value)
cases_by_age_sex <- map_df(age_sex_sheets, ~read_xlsx(path = "2023-11-28_glasgow-acf.xlsx",
sheet = .))
cases_by_age_sex %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
NA
NA
What percentage of adults (15+ participated in the intervention in 1957)?
Note that in the Report of Sir Kenneth Cowan, we have the following estimates of participation (we will use these for the manuscript, as they are not based on my estimates)
male_adult_resident_participation <- 281875
female_adult_resident_participation <- 340474
male_adult_resident_population <- 381713
female_adult_resident_population <- 437588
#overall participation
(male_adult_resident_participation+female_adult_resident_participation)/(male_adult_resident_population+female_adult_resident_population)
[1] 0.7596097
#male participation
male_adult_resident_participation/male_adult_resident_population
[1] 0.7384475
#female participation
female_adult_resident_participation/female_adult_resident_population
[1] 0.7780698
Look at uptake of screening by age and sex
Combine figure with table for single figure.
ggsave(here("figures/s4.tiff"), height=10)
Saving 7 x 10 in image
Uptake by division
uptake_division <- read_xlsx("2024-03-26_mass_xray_uptake.xlsx", sheet = "uptake_division")
division_pops %>%
filter(year==1957) %>%
select(division, population_without_inst_ship) %>%
left_join(uptake_division) %>%
mutate(pct_pop_examined = examined/population_without_inst_ship)
Joining with `by = join_by(division)`
Now calculate case notification rates per 100,000 population
Merge the notification and population denominator datasets together.
Here we need to include the whole population (with shipping and institutions) as they are included in the notifications.
overall_inc <- overall_pops %>%
left_join(cases_by_year)
Joining with `by = join_by(year, year2)`
overall_inc <- overall_inc %>%
mutate(inc_pulm_100k = pulmonary_notifications/total_population*100000,
inc_ep_100k = `non-pulmonary_notifications`/total_population*100000,
inc_100k = total_notifications/total_population*100000)
overall_inc %>%
select(year, inc_100k, inc_pulm_100k, inc_ep_100k) %>%
mutate_at(.vars = vars(inc_100k, inc_pulm_100k, inc_ep_100k),
.funs = funs(round)) %>%
datatable()
Warning: `funs()` was deprecated in dplyr 0.8.0.
ℹ Please use a list of either functions or lambdas:
# Simple named list: list(mean = mean, median = median)
# Auto named with `tibble::lst()`: tibble::lst(mean, median)
# Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
overall_inc %>%
select(year2, inc_pulm_100k, inc_ep_100k) %>%
pivot_longer(cols = c(inc_pulm_100k, `inc_ep_100k`)) %>%
mutate(name = case_when(name == "inc_pulm_100k" ~ "Pulmonary TB",
name == "inc_ep_100k" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=value, x=year2, group = name, fill=name), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis case notification rate",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Case notification rate (per 100,000)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist() +
theme(legend.position = "bottom")
NA
NA
NA
Change in case notification rates pre-intervention
#pre-ACF
overall_inc %>%
filter(year %in% 1950:1956) %>%
summarise(change = (((last(inc_pulm_100k)-first(inc_pulm_100k))/first(inc_pulm_100k))/7)*100)
#post-ACF
overall_inc %>%
filter(year %in% 1958:1963) %>%
summarise(change = (((last(inc_pulm_100k)-first(inc_pulm_100k))/first(inc_pulm_100k))/6)*100)
NA
division_inc <- division_pops %>%
left_join(cases_by_division)
Joining with `by = join_by(division, year)`
division_inc <- division_inc %>%
mutate(inc_100k = cases/total_population*100000)
division_inc %>%
select(year, division, tb_type, inc_100k) %>%
mutate_at(.vars = vars(inc_100k),
.funs = funs(round)) %>%
datatable()
Warning: `funs()` was deprecated in dplyr 0.8.0.
ℹ Please use a list of either functions or lambdas:
# Simple named list: list(mean = mean, median = median)
# Auto named with `tibble::lst()`: tibble::lst(mean, median)
# Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
division_inc %>%
mutate(tb_type = case_when(tb_type == "Pulmonary" ~ "Pulmonary TB",
tb_type == "Non-Pulmonary" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=inc_100k, x=year2, group = tb_type, fill=tb_type), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
facet_wrap(division~.) +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis case notification rate, by Division",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Case notification rate (per 100,000)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: extra-pulmonary TB cases by Division/Ward not reported in 1962-1963"
) +
theme_ggdist() +
theme(legend.position = "bottom")
NA
NA
NA
Here we will filter out the institutions and harbour from the denominators, as we don’t have reliable population denominators for them.
ward_inc <- ward_pops %>%
left_join(cases_by_ward)
Joining with `by = join_by(ward, year, year2)`
ward_inc <- ward_inc %>%
mutate(inc_100k = cases/population_without_inst_ship*100000)
ward_inc %>%
select(year, ward, tb_type, inc_100k) %>%
mutate_at(.vars = vars(inc_100k),
.funs = funs(round)) %>%
datatable()
Warning: `funs()` was deprecated in dplyr 0.8.0.
ℹ Please use a list of either functions or lambdas:
# Simple named list: list(mean = mean, median = median)
# Auto named with `tibble::lst()`: tibble::lst(mean, median)
# Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
ward_inc %>%
mutate(tb_type = case_when(tb_type == "Pulmonary" ~ "Pulmonary TB",
tb_type == "Non-Pulmonary" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=inc_100k, x=year2, group = tb_type, fill=tb_type), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
facet_wrap(ward~.) +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis case notification rate, by Ward",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Incidence (per 100,000)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: extra-pulmonary TB cases by Division/Ward not reported in 1962-1963"
) +
theme(legend.position = "bottom")
NA
NA
NA
NA
On a map
st_as_sf(left_join(ward_inc, glasgow_wards_1951)) %>%
filter(tb_type=="Pulmonary") %>%
ggplot() +
geom_sf(aes(fill=inc_100k)) +
facet_wrap(year~., ncol = 7) +
scale_fill_viridis_c(name="Case notification rate (per 100,000)",
option = "A") +
theme_ggdist() +
theme(legend.position = "top",
legend.key.width = unit(2, "cm"),
panel.border = element_rect(colour = "grey78", fill=NA)) +
guides(fill=guide_colorbar(title.position = "top"))
Joining with `by = join_by(division, ward, ward_number)`
Import the TB mortality data.
First, overall deaths. Note that in the original reports, we have a pulmonary TB death rate per million for all years, and numbers of pulmonary TB deaths for each year apart from 1950.
#get the overall mortality sheets
deaths_sheets <- enframe(all_sheets) %>%
filter(grepl("deaths", value)) %>%
pull(value)
overall_deaths <- map_df(deaths_sheets, ~read_xlsx(path = "2023-11-28_glasgow-acf.xlsx",
sheet = .))
overall_deaths %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
NA
NA
NA
Plot the raw numbers of pulmonary deaths
overall_deaths %>%
ggplot(aes(x=year, y=pulmonary_deaths)) +
geom_line(colour = "#DE0D92") +
geom_point(colour = "#DE0D92") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
labs(y="Pulmonary TB deaths per year",
x = "Year",
title = "Numbers of pulmonary TB deaths",
subtitle = "Glasgow, 1950-1963",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: no data for 1950") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA))
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_line()`).
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_point()`).
Now the incidence of pulmonary TB death
overall_deaths %>%
ggplot(aes(x=year, y=pulmonary_death_rate_per_100k)) +
geom_line(colour = "#4D6CFA") +
geom_point(colour = "#4D6CFA") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
labs(y="Annual incidence of death (per 100,000)",
x = "Year",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA))
ggsave(here("figures/s8.tiff"), width=10)
Saving 10 x 4.51 in image
Make Table 1 here, and save for publication.
overall_pops %>%
select(year, total_population) %>%
left_join(overall_inc %>%
select(year,
pulmonary_notifications, inc_pulm_100k,
`non-pulmonary_notifications`, inc_ep_100k,
total_notifications, inc_100k)) %>%
left_join(overall_deaths %>%
select(year,
pulmonary_deaths, pulmonary_death_rate_per_100k)) %>%
mutate(percent_pulmonary = percent(pulmonary_notifications/(total_notifications ), accuracy=0.1)) %>%
mutate(across(where(is.numeric) & !(year), ~round(., digits=1))) %>%
mutate(across(where(is.numeric) & !(year), ~comma(.)))
Joining with `by = join_by(year)`
Joining with `by = join_by(year)`
Comparison fo age-sex distribution of cases in 1950-1956 vs. 1957
label_abs2 <- function(x) {
percent(abs(x))
}
cases_by_age_sex %>%
ungroup() %>%
filter(tb_type=="Pulmonary") %>%
mutate(acf_period = case_when(year %in% c(1950:1956) ~ "a. pre-acf",
year %in% c(1957) ~ "b. acf",
year %in% c(1958:1963) ~ "c. post-acf")) %>%
group_by(acf_period, age, sex) %>%
summarise(cases = sum(cases)) %>%
ungroup() %>%
group_by(acf_period) %>%
mutate(period_total = sum(cases)) %>%
mutate(pct = cases/period_total) %>%
mutate(pct2 = case_when(sex=="F" ~ -pct,
TRUE ~ pct)) %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Pre-ACF",
acf_period=="b. acf" ~ "ACF",
acf_period=="c. post-acf" ~ "Post-ACF")) %>%
ggplot() +
geom_vline(aes(xintercept=0), linetype=2) +
geom_point(aes(x=pct2,y=age, colour=fct_relevel(acf_period,
"Pre-ACF",
"ACF",
"Post-ACF")), stat="identity") +
scale_x_continuous(labels=label_abs2, limits = c(-0.2, 0.2)) +
scale_colour_manual(values = c("#DE0D92", "grey50", "#4D6CFA")) +
theme_grey(base_family = "Aptos") +
labs(x= "<- Female Percent of cases Male ->",
y="") +
theme(legend.title = element_blank(),
legend.position = "bottom")
`summarise()` has grouped output by 'acf_period', 'age'. You can override using the `.groups` argument.
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)) :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
Unable to calculate text width/height (using zero)
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
no font could be found for family "Aptos"
ggsave(here("figures/s5.tiff"))
Saving 7.29 x 4.51 in image
Prepare the datasets for modelling
mdata <- ward_inc %>%
filter(tb_type=="Pulmonary") %>%
mutate(acf_period = case_when(year %in% c(1950:1956) ~ "a. pre-acf",
year %in% c(1957) ~ "b. acf",
year %in% c(1958:1963) ~ "c. post-acf")) %>%
group_by(ward) %>%
mutate(y_num = row_number()) %>%
ungroup()
mdata_extrapulmonary <- ward_inc %>%
filter(tb_type=="Non-Pulmonary") %>%
mutate(acf_period = case_when(year %in% c(1950:1956) ~ "a. pre-acf",
year %in% c(1957) ~ "b. acf",
year %in% c(1958:1963) ~ "c. post-acf")) %>%
group_by(ward) %>%
mutate(y_num = row_number()) %>%
ungroup() %>%
filter(year<=1961) #no data for 1962 and 1963
#scaffold for overall predictions
overall_scaffold <- mdata %>%
select(year, year2, y_num, acf_period, population_without_inst_ship, ward, cases) %>%
group_by(year, year2, y_num, acf_period) %>%
summarise(population_without_inst_ship = sum(population_without_inst_ship),
cases = sum(cases)) %>%
ungroup() %>%
mutate(inc_100k = cases/population_without_inst_ship*100000) %>%
left_join(mdata_extrapulmonary %>% group_by(year) %>%
summarise(cases_extrapulmonary = sum(cases))) %>%
mutate(inc_100k_extrapulmonary = cases_extrapulmonary/population_without_inst_ship*100000)
`summarise()` has grouped output by 'year', 'year2', 'y_num'. You can override using the `.groups` argument.
Joining with `by = join_by(year)`
Look at the mean and variance of counts (counts of pulmonary notifications are what we are predicting)
#Mean of counts per year
mean(mdata$cases)
[1] 48.32819
#variance of counts per year
var(mdata$cases)
[1] 915.5749
Quite a bit of over-dispersion here, so negative binomial distribution might be a better choice of distributional family than Poisson.
Fit the model with the data
m_pulmonary <- brm(
cases ~ 0 + Intercept + y_num*acf_period + (1 + y_num*acf_period | ward) + offset(log(population_without_inst_ship)),
data = mdata,
family = negbinomial(),
seed = 1234,
chains = 4, cores = 4,
prior = prior(normal(0,1), class=b, coef = "Intercept") +
prior(gamma(0.01, 0.01), class = shape) +
prior(normal(0, 1), class = b) +
prior(exponential(1), class=sd) +
prior(lkj(4), class=cor),
control = list(adapt_delta = 0.9))
Compiling Stan program...
Start sampling
starting worker pid=42892 on localhost:11039 at 13:53:33.579
starting worker pid=42905 on localhost:11039 at 13:53:33.668
starting worker pid=42918 on localhost:11039 at 13:53:33.758
starting worker pid=42931 on localhost:11039 at 13:53:33.847
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 0.000156 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 1.56 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 0.000154 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 1.54 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 0.000155 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 1.55 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 0.000162 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 1.62 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 22.133 seconds (Warm-up)
Chain 2: 17.498 seconds (Sampling)
Chain 2: 39.631 seconds (Total)
Chain 2:
Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 22.345 seconds (Warm-up)
Chain 3: 17.384 seconds (Sampling)
Chain 3: 39.729 seconds (Total)
Chain 3:
Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 23.301 seconds (Warm-up)
Chain 1: 17.586 seconds (Sampling)
Chain 1: 40.887 seconds (Total)
Chain 1:
Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 23.909 seconds (Warm-up)
Chain 4: 17.685 seconds (Sampling)
Chain 4: 41.594 seconds (Total)
Chain 4:
Warning message:
package ‘StanHeaders’ was built under R version 4.3.3
Warning message:
package ‘StanHeaders’ was built under R version 4.3.3
Warning message:
package ‘StanHeaders’ was built under R version 4.3.3
Warning message:
package ‘StanHeaders’ was built under R version 4.3.3
#check model diagnostics
summary(m_pulmonary)
Family: negbinomial
Links: mu = log; shape = identity
Formula: cases ~ 0 + Intercept + y_num * acf_period + (1 + y_num * acf_period | ward) + offset(log(population_without_inst_ship))
Data: mdata (Number of observations: 518)
Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
total post-warmup draws = 4000
Multilevel Hyperparameters:
~ward (Number of levels: 37)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.25 0.04 0.19 0.33 1.00 1400 2056
sd(y_num) 0.02 0.01 0.01 0.03 1.00 837 699
sd(acf_periodb.acf) 0.06 0.04 0.00 0.16 1.00 1546 1825
sd(acf_periodc.postMacf) 0.12 0.06 0.00 0.24 1.01 646 779
sd(y_num:acf_periodb.acf) 0.01 0.01 0.00 0.02 1.00 1234 1464
sd(y_num:acf_periodc.postMacf) 0.01 0.01 0.00 0.02 1.01 422 1406
cor(Intercept,y_num) -0.46 0.19 -0.76 -0.02 1.00 1781 1776
cor(Intercept,acf_periodb.acf) -0.22 0.29 -0.71 0.39 1.00 3134 2827
cor(y_num,acf_periodb.acf) -0.04 0.27 -0.55 0.50 1.00 5046 2574
cor(Intercept,acf_periodc.postMacf) -0.14 0.24 -0.58 0.36 1.00 4138 2791
cor(y_num,acf_periodc.postMacf) 0.09 0.25 -0.39 0.56 1.00 2886 3023
cor(acf_periodb.acf,acf_periodc.postMacf) 0.06 0.28 -0.47 0.58 1.00 1910 2495
cor(Intercept,y_num:acf_periodb.acf) -0.24 0.28 -0.71 0.35 1.00 2814 3021
cor(y_num,y_num:acf_periodb.acf) -0.05 0.27 -0.55 0.47 1.00 4863 3111
cor(acf_periodb.acf,y_num:acf_periodb.acf) -0.06 0.28 -0.59 0.48 1.00 4367 3150
cor(acf_periodc.postMacf,y_num:acf_periodb.acf) 0.07 0.27 -0.47 0.58 1.00 3450 2859
cor(Intercept,y_num:acf_periodc.postMacf) -0.02 0.26 -0.51 0.48 1.00 3896 2794
cor(y_num,y_num:acf_periodc.postMacf) -0.03 0.27 -0.53 0.49 1.00 2307 2904
cor(acf_periodb.acf,y_num:acf_periodc.postMacf) 0.04 0.28 -0.49 0.56 1.00 2463 2964
cor(acf_periodc.postMacf,y_num:acf_periodc.postMacf) -0.08 0.29 -0.62 0.49 1.00 2300 2996
cor(y_num:acf_periodb.acf,y_num:acf_periodc.postMacf) 0.06 0.28 -0.49 0.58 1.01 1916 2985
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -6.13 0.05 -6.23 -6.04 1.01 815 1324
y_num -0.02 0.01 -0.03 -0.01 1.00 2151 2996
acf_periodb.acf 0.02 1.00 -1.98 1.99 1.00 3423 2951
acf_periodc.postMacf 0.04 0.10 -0.16 0.24 1.00 3399 3192
y_num:acf_periodb.acf 0.08 0.12 -0.16 0.33 1.00 3432 2931
y_num:acf_periodc.postMacf -0.05 0.01 -0.07 -0.03 1.00 3212 3194
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
shape 92.51 20.80 61.39 141.90 1.00 2423 2772
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
plot(m_pulmonary)
pp_check(m_pulmonary, type='ecdf_overlay')
Using 10 posterior draws for ppc type 'ecdf_overlay' by default.
Nicer version of trace plots for supplemental material
as_draws_df(m_pulmonary) %>%
bayesplot::mcmc_rank_overlay(pars = vars(b_Intercept:shape),
facet_args = list(ncol = 4)) +
scale_colour_scico_d(palette = "managua", name = "Chain") +
theme_ggdist()+
theme(panel.border = element_rect(colour = "grey78", fill=NA),
legend.position = "top")
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
Nicer version of table of parameters for supplement
summarise_draws(m_pulmonary) %>%
mutate(across(c(mean:ess_tail), comma, accuracy=0.01)) %>%
write_csv(here("figures/s1_table.csv"))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `across(c(mean:ess_tail), comma, accuracy = 0.01)`.
Caused by warning:
! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
Supply arguments directly to `.fns` through an anonymous function instead.
# Previously
across(a:b, mean, na.rm = TRUE)
# Now
across(a:b, \(x) mean(x, na.rm = TRUE))
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
Summarise the posterior in graphical form
f1b <- plot_counterfactual(model_data = overall_scaffold, model = m_pulmonary,
population_denominator = population_without_inst_ship, outcome = inc_100k, grouping_var=NULL,
re_formula = NA)
f1b
Warning: The S3 guide system was deprecated in ggplot2 3.5.0.
ℹ It has been replaced by a ggproto system that can be extended.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_line()`).
Make this into a figure combined with the map of empirical data
f1a <- st_as_sf(left_join(ward_inc, glasgow_wards_1951)) %>%
filter(tb_type=="Pulmonary") %>%
ggplot() +
geom_sf(aes(fill=inc_100k), colour="grey98", lwd=0.01) +
facet_wrap(year~., ncol = 7) +
scale_fill_scico(name="CNR (per 100,000)",
palette = "acton", direction = -1) +
theme_grey() +
theme(legend.position = "top",
#legend.key.width = unit(1, "cm"),
legend.title.align = 0.5,
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.background = element_blank(),
legend.title = element_text(size=10))
Joining with `by = join_by(division, ward, ward_number)`
Warning: The `legend.title.align` argument of `theme()` is deprecated as of ggplot2 3.5.0.
ℹ Please use theme(legend.title = element_text(hjust)) instead.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
(f1a / f1b) + plot_annotation(tag_levels = "A")
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_line()`).
ggsave(here("figures/f1.tiff"), width=7, height=8)
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_line()`).
Summary of change in notifications numerically
overall_change <- summarise_change(model_data=overall_scaffold, model=m_pulmonary,
population_denominator=population_without_inst_ship, grouping_var=NULL, re_formula = NA)
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
#want to keep the summary estimates here
tokeep <- c("peak_summary", "level_summary", "slope_summary")
#summary measures in a table
overall_change %>%
keep((names(.) %in% tokeep)) %>%
bind_rows() %>%
mutate(across(c(estimate:.upper), number, accuracy=0.01)) %>%
select(measure, everything()) %>%
datatable()
NA
NA
Numbers of pulmonary TB cases averted compared to counterfactual per year.
overall_pulmonary_counterf <- calculate_counterfactual(model_data = overall_scaffold, model=m_pulmonary, population_denominator = population_without_inst_ship)
Joining with `by = join_by(year, population_without_inst_ship, .draw)`
Joining with `by = join_by(.draw)`
overall_pulmonary_counterf$counter_post %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
NA
Total pulmonary TB cases averted between 1958 and 1963
overall_pulmonary_counterf$counter_post_overall %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
NA
What are the correlations between peak, level, and slope?
#RR.peak histogram
a <- overall_change$peak_draws %>%
ggplot() +
geom_histogram(aes(x=estimate), fill="darkblue", colour="darkblue", alpha=0.3)+
scale_fill_gradient(high="lightblue1",low="darkblue") +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA)) +
labs(x="RR.peak",
y="")
#RR. level histogram
b <- overall_change$level_draws %>%
ggplot() +
geom_histogram(aes(x=estimate), fill="darkblue", colour="darkblue", alpha=0.3)+
scale_fill_gradient(high="lightblue1",low="darkblue") +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA)) +
labs(x="RR.level",
y="")
#RR.slope histogram
c <- overall_change$slope_draws %>%
ggplot() +
geom_histogram(aes(x=estimate), fill="darkblue", colour="darkblue", alpha=0.3)+
scale_fill_gradient(high="lightblue1",low="darkblue") +
#scale_x_continuous(limits = c(0, 6)) +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA)) +
labs(x="RR.slope",
y="")
#Correlation between RR.peak and RR.level
cor_rr_peak_rr_level <- round(cor(pluck(overall_change$peak_draws$estimate), pluck(overall_change$level_draws$estimate)), digits = 2)
#Correlation between RR.peak and RR.slope
cor_rr_peak_rr_slope <- round(cor(pluck(overall_change$peak_draws$estimate), pluck(overall_change$slope_draws$estimate)), digits = 2)
#Correlation between RR.level and RR.slope
cor_rr_level_rr_slope <- round(cor(pluck(overall_change$level_draws$estimate), pluck(overall_change$slope_draws$estimate)), digits = 2)
#plot of correlation between RR.peak and RR.level
d <- bind_cols(RR.peak=pluck(overall_change$peak_draws$estimate),
RR.level =pluck(overall_change$level_draws$estimate)) %>%
ggplot(aes(y=RR.peak, x = RR.level)) +
geom_hex() +
geom_smooth(se=FALSE, colour="firebrick", method = "lm") +
geom_text(aes(y=2.2, x=0.58, label=cor_rr_peak_rr_level), colour="firebrick") +
scale_fill_gradient(high="lightblue1",low="darkblue") +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA))
#plot of correlation between RR.peak and RR.slope
e <- bind_cols(RR.peak=pluck(overall_change$peak_draws$estimate),
RR.slope =pluck(overall_change$slope_draws$estimate)) %>%
ggplot(aes(y=RR.peak, x = RR.slope)) +
geom_hex() +
geom_smooth(se=FALSE, colour="firebrick", method = "lm") +
geom_text(aes(y=2.1, x=0.65, label=cor_rr_peak_rr_slope), colour="firebrick") +
#scale_x_continuous(limits = c(0, 6)) +
scale_fill_gradient(high="lightblue1",low="darkblue") +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA))
#plot of correlation between RR.level and RR.slope
f <- bind_cols(RR.level=pluck(overall_change$level_draws$estimate),
RR.slope =pluck(overall_change$slope_draws$estimate)) %>%
ggplot(aes(y=RR.level, x = RR.slope)) +
geom_hex() +
geom_smooth(se=FALSE, colour="firebrick", method = "lm") +
geom_text(aes(y=0.75, x=0.65, label=cor_rr_level_rr_slope), colour="firebrick") +
#scale_x_continuous(limits = c(0, 6)) +
scale_fill_gradient(high="lightblue1",low="darkblue") +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA))
(plot_spacer() + plot_spacer() + c) /
(plot_spacer() + b + f) /
(a + d + e)
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning in geom_text(aes(y = 0.75, x = 0.65, label = cor_rr_level_rr_slope), :
All aesthetics have length 1, but the data has 4000 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
`geom_smooth()` using formula = 'y ~ x'
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning in geom_text(aes(y = 2.2, x = 0.58, label = cor_rr_peak_rr_level), :
All aesthetics have length 1, but the data has 4000 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
`geom_smooth()` using formula = 'y ~ x'
Warning in geom_text(aes(y = 2.1, x = 0.65, label = cor_rr_peak_rr_slope), :
All aesthetics have length 1, but the data has 4000 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
`geom_smooth()` using formula = 'y ~ x'
ggsave(here("figures/s9.tiff"), width=8, height=8)
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning in geom_text(aes(y = 0.75, x = 0.65, label = cor_rr_level_rr_slope), :
All aesthetics have length 1, but the data has 4000 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
`geom_smooth()` using formula = 'y ~ x'
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning in geom_text(aes(y = 2.2, x = 0.58, label = cor_rr_peak_rr_level), :
All aesthetics have length 1, but the data has 4000 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
`geom_smooth()` using formula = 'y ~ x'
Warning in geom_text(aes(y = 2.1, x = 0.65, label = cor_rr_peak_rr_slope), :
All aesthetics have length 1, but the data has 4000 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
`geom_smooth()` using formula = 'y ~ x'
Plot the counterfactual at ward level
plot_counterfactual(model_data = mdata, model=m_pulmonary, outcome = inc_100k, population_denominator = population_without_inst_ship,
grouping_var = ward, ward, re_formula= ~(1 + y_num*acf_period | ward)) +
scale_y_continuous(limits = c(0,500))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning: Removed 37 rows containing missing values or values outside the scale range (`geom_line()`).
ggsave(here("figures/s6.tiff"), width=16, height=12)
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning: Removed 37 rows containing missing values or values outside the scale range (`geom_line()`).
Summary of change in notifications at ward level
ward_change <- summarise_change(model_data=mdata, model=m_pulmonary,
population_denominator=population_without_inst_ship, grouping_var=ward,
re_formula = ~(1 + y_num*acf_period | ward))
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
`summarise()` has grouped output by '.draw', 'y_num'. You can override using the `.groups` argument.
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
#want to keep the summary estimates here
tokeep <- c("peak_summary", "level_summary", "slope_summary")
#summary measures in a table
ward_change %>%
keep((names(.) %in% tokeep)) %>%
bind_rows() %>%
mutate(across(c(estimate:.upper), number, accuracy=0.01)) %>%
select(measure, everything()) %>%
datatable()
#plot these in a figure
ward_effects <- ward_change %>%
keep((names(.) %in% tokeep)) %>%
bind_rows() %>%
bind_rows(overall_change$peak_summary) %>%
bind_rows(overall_change$level_summary) %>%
bind_rows(overall_change$slope_summary) %>%
mutate_at(.vars = vars(estimate:.upper),
.funs = funs(as.numeric)) %>%
select(measure, everything()) %>%
mutate(estimate = as.double(estimate)) %>%
full_join(glasgow_wards_1951) %>%
mutate(ward2 = paste0(ward_number, ". ", ward)) %>%
mutate(ward2 = case_when(is.na(ward) ~ "Overall",
TRUE ~ ward2)) %>%
st_as_sf()
Warning: `funs()` was deprecated in dplyr 0.8.0.
ℹ Please use a list of either functions or lambdas:
# Simple named list: list(mean = mean, median = median)
# Auto named with `tibble::lst()`: tibble::lst(mean, median)
# Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
Joining with `by = join_by(ward)`
#function for plotting choropleth maps
plot_ward_effect <- function(data, measure){
{{data}} %>%
filter(measure == {{measure}}) %>%
ggplot() +
geom_sf(aes(fill=estimate)) +
geom_sf_label(aes(label = ward_number), size=3, fill=NA, label.size = NA, colour="black") +
scale_fill_scico(trans="log", palette = "roma", midpoint = 0, limits=c(0.5,2.25),
breaks = c(0.5, 0.75, 1, 1.5, 2, 2.5), labels = c(0.5, 0.75, 1, 1.5, 2, 2.5),
name="Relative rate") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA),
axis.text.x=element_text(angle=45, hjust=1)) +
labs(x="", y="")
}
#function for plotting catapiller plots
plot_ward_cat <- function(data, measure, scale){
ggplot() +
geom_hline(aes(yintercept=1), linetype=2) +
geom_pointrange(data = {{data}} %>%
filter(measure=={{measure}}) %>%
filter(!is.na(ward)),
aes(y=estimate, ymin=.lower, ymax=.upper,
x=fct_reorder(ward2, estimate), colour=estimate)) +
geom_pointrange(data = {{data}} %>%
filter(measure=={{measure}}) %>%
filter(is.na(ward)),
aes(y=estimate, ymin=.lower, ymax=.upper,
x=ward2), colour="black") +
coord_flip() +
scale_colour_scico(trans="log", palette = "roma", midpoint = 0, limits=c(0.5,2.25),
breaks = c(0.5, 0.75, 1, 1.5, 2, 2.5), labels = c(0.5, 0.75, 1, 1.5, 2, 2.5),
name="Relative rate") +
scale_y_continuous() +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA)) +
labs(x = "",
y = "Relative rate (95% UI)")+
guides(x = "axis_truncated", y = "axis_truncated")
}
ward_peak_i <- plot_ward_effect(data = ward_effects, measure = "RR.peak") + ggtitle("Peak effect")
ward_level_i <- plot_ward_effect(data = ward_effects, measure = "RR.level") + ggtitle("Level effect")
ward_slope_i <- plot_ward_effect(data = ward_effects, measure = "RR.slope") + ggtitle("Slope effect")
ward_peak_ii <- plot_ward_cat(data = ward_effects, measure = "RR.peak") + ggtitle("Peak effect")
ward_level_ii <- plot_ward_cat(data = ward_effects, measure = "RR.level") + ggtitle("Level effect")
ward_slope_ii <- plot_ward_cat(data = ward_effects, measure = "RR.slope") + ggtitle("Slope effect")
s4 <- (ward_peak_i + ward_level_i + ward_slope_i) /
(ward_peak_ii + ward_level_ii + ward_slope_ii)
s4[[1]] <- s4[[1]] + plot_layout(tag_level = 'new')
s4[[2]] <- s4[[2]] + plot_layout(tag_level = 'new')
s4 + plot_annotation(tag_levels = c('A', '1')) + plot_layout(guides = 'collect') &
theme(legend.position='bottom',
legend.key.width = unit(3, "cm"))
Warning in st_point_on_surface.sfc(sf::st_zm(x)) :
st_point_on_surface may not give correct results for longitude/latitude data
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_label()`).
Warning in st_point_on_surface.sfc(sf::st_zm(x)) :
st_point_on_surface may not give correct results for longitude/latitude data
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_label()`).
Warning in st_point_on_surface.sfc(sf::st_zm(x)) :
st_point_on_surface may not give correct results for longitude/latitude data
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_label()`).
ggsave(here("figures/f2.tiff"), width = 16, height=12)
Warning in st_point_on_surface.sfc(sf::st_zm(x)) :
st_point_on_surface may not give correct results for longitude/latitude data
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_label()`).
Warning in st_point_on_surface.sfc(sf::st_zm(x)) :
st_point_on_surface may not give correct results for longitude/latitude data
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_label()`).
Warning in st_point_on_surface.sfc(sf::st_zm(x)) :
st_point_on_surface may not give correct results for longitude/latitude data
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_label()`).
Calculate the counterfactual per ward
ward_pulmonary_counterf <- calculate_counterfactual(model_data = mdata, model=m_pulmonary,
population_denominator = population_without_inst_ship,
grouping_var = ward, re_formula=~(1 + y_num*acf_period | ward))
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
Joining with `by = join_by(year, population_without_inst_ship, .draw, ward)`
Joining with `by = join_by(.draw, ward)`
ward_pulmonary_counterf$counter_post %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
NA
Overall counterfactual per ward
ward_pulmonary_counterf$counter_post_overall %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
Now we will model the extra-pulmonary TB notification rate. Struggling a bit with negative binomial model, so revert to Poisson.
m_extrapulmonary <- brm(
cases ~ 1 + y_num*acf_period + (1 + y_num*acf_period | ward) + offset(log(population_without_inst_ship)),
data = mdata_extrapulmonary,
family = negbinomial(),
seed = 1234,
chains = 4, cores = 4,
prior = prior(normal(0,1000), class = Intercept) +
prior(gamma(0.01, 0.01), class = shape) +
prior(normal(0, 1), class = b) +
prior(exponential(1), class=sd) +
prior(lkj(2), class=cor))
Compiling Stan program...
Start sampling
starting worker pid=59760 on localhost:11039 at 16:32:19.174
starting worker pid=59773 on localhost:11039 at 16:32:19.265
starting worker pid=59786 on localhost:11039 at 16:32:19.355
starting worker pid=59799 on localhost:11039 at 16:32:19.443
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 0.000173 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 1.73 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 0.000158 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 1.58 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 0.000164 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 1.64 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 0.000161 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 1.61 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 8.734 seconds (Warm-up)
Chain 2: 3.992 seconds (Sampling)
Chain 2: 12.726 seconds (Total)
Chain 2:
Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 8.663 seconds (Warm-up)
Chain 3: 3.95 seconds (Sampling)
Chain 3: 12.613 seconds (Total)
Chain 3:
Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 8.614 seconds (Warm-up)
Chain 1: 4.872 seconds (Sampling)
Chain 1: 13.486 seconds (Total)
Chain 1:
Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 9.111 seconds (Warm-up)
Chain 4: 6.485 seconds (Sampling)
Chain 4: 15.596 seconds (Total)
Chain 4:
Warning: There were 1 divergent transitions after warmup. See
https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
to find out why this is a problem and how to eliminate them.
Warning: Examine the pairs() plot to diagnose sampling problems
Warning message:
Warning message:
package ‘StanHeaders’ was built under R version 4.3.3
package ‘StanHeaders’ was built under R version 4.3.3
Warning message:
package ‘StanHeaders’ was built under R version 4.3.3
Warning message:
package ‘StanHeaders’ was built under R version 4.3.3
summary(m_extrapulmonary)
Warning: There were 1 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
Family: negbinomial
Links: mu = log; shape = identity
Formula: cases ~ 1 + y_num * acf_period + (1 + y_num * acf_period | ward) + offset(log(population_without_inst_ship))
Data: mdata_extrapulmonary (Number of observations: 444)
Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
total post-warmup draws = 4000
Multilevel Hyperparameters:
~ward (Number of levels: 37)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.33 0.06 0.23 0.46 1.00 1498 2238
sd(y_num) 0.02 0.01 0.00 0.04 1.00 480 1177
sd(acf_periodb.acf) 0.11 0.09 0.01 0.32 1.00 2076 1877
sd(acf_periodc.postMacf) 0.13 0.09 0.01 0.35 1.01 1333 2181
sd(y_num:acf_periodb.acf) 0.01 0.01 0.00 0.04 1.00 1971 1580
sd(y_num:acf_periodc.postMacf) 0.01 0.01 0.00 0.04 1.00 1101 1431
cor(Intercept,y_num) -0.11 0.31 -0.66 0.51 1.00 2112 2441
cor(Intercept,acf_periodb.acf) -0.02 0.33 -0.64 0.60 1.00 4848 2867
cor(y_num,acf_periodb.acf) -0.01 0.33 -0.64 0.63 1.00 4347 3132
cor(Intercept,acf_periodc.postMacf) -0.08 0.32 -0.67 0.56 1.00 3794 2857
cor(y_num,acf_periodc.postMacf) -0.05 0.33 -0.68 0.58 1.00 2939 2782
cor(acf_periodb.acf,acf_periodc.postMacf) 0.02 0.33 -0.61 0.64 1.00 2894 2908
cor(Intercept,y_num:acf_periodb.acf) -0.02 0.33 -0.64 0.61 1.00 4859 2790
cor(y_num,y_num:acf_periodb.acf) -0.01 0.33 -0.63 0.62 1.00 3994 3027
cor(acf_periodb.acf,y_num:acf_periodb.acf) -0.07 0.34 -0.70 0.59 1.00 3289 3143
cor(acf_periodc.postMacf,y_num:acf_periodb.acf) 0.01 0.33 -0.62 0.61 1.00 3534 3556
cor(Intercept,y_num:acf_periodc.postMacf) -0.15 0.31 -0.69 0.51 1.00 3533 2900
cor(y_num,y_num:acf_periodc.postMacf) -0.06 0.33 -0.66 0.60 1.00 2552 3059
cor(acf_periodb.acf,y_num:acf_periodc.postMacf) 0.03 0.34 -0.60 0.64 1.00 2220 3046
cor(acf_periodc.postMacf,y_num:acf_periodc.postMacf) -0.08 0.35 -0.72 0.61 1.00 2585 2729
cor(y_num:acf_periodb.acf,y_num:acf_periodc.postMacf) 0.03 0.34 -0.62 0.67 1.00 2197 2676
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -7.93 0.07 -8.08 -7.79 1.00 1713 2651
y_num -0.09 0.01 -0.11 -0.07 1.00 4226 3004
acf_periodb.acf 0.05 0.99 -1.87 2.00 1.00 2042 2679
acf_periodc.postMacf -0.35 0.39 -1.11 0.39 1.00 2385 2473
y_num:acf_periodb.acf -0.02 0.12 -0.26 0.22 1.00 2041 2404
y_num:acf_periodc.postMacf 0.02 0.04 -0.06 0.10 1.00 2356 2564
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
shape 94.70 67.21 26.10 272.34 1.00 4055 3328
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
plot(m_extrapulmonary)
pp_check(m_extrapulmonary, type='ecdf_overlay')
Using 10 posterior draws for ppc type 'ecdf_overlay' by default.
Summarise in plot
plot_counterfactual(model_data = overall_scaffold %>% filter(year<=1961), model=m_extrapulmonary,
population_denominator = population_without_inst_ship, outcome=inc_100k_extrapulmonary, re_formula = NA) +
scale_y_continuous(limits = c(0,50))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_line()`).
ggsave(here("figures/s10.tiff"), width=10)
Saving 10 x 4.51 in image
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_line()`).
Summarise numerically.
overall_change_extrapulmonary <- summarise_change(model_data=overall_scaffold, model=m_extrapulmonary,
population_denominator=population_without_inst_ship, grouping_var=NULL, re_formula = NA)
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
#want to keep the summary estimates here
tokeep <- c("peak_summary", "level_summary", "slope_summary")
#summary measures in a table
overall_change_extrapulmonary %>%
keep(names(.) %in% tokeep) %>%
bind_rows() %>%
mutate(across(c(estimate:.upper), number, accuracy=0.01)) %>%
select(measure, everything()) %>%
datatable()
NA
Numbers of extra-pulmonary TB cases averted overall.
overall_ep_counterf <- calculate_counterfactual(model_data = mdata_extrapulmonary, model=m_extrapulmonary,
population_denominator = population_without_inst_ship)
Joining with `by = join_by(year, population_without_inst_ship, .draw)`
Joining with `by = join_by(.draw)`
overall_ep_counterf$counter_post %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
Total extrapulmonary TB cases averted between 1958 and 1963
overall_ep_counterf$counter_post_overall %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
NA
Make into Table 2
bind_rows(
overall_pulmonary_counterf$counter_post %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
mutate(model = "PTB_ward"),
overall_pulmonary_counterf$counter_post_overall %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
mutate(model = "PTB_overall"),
overall_ep_counterf$counter_post %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
mutate(model = "EPTB"),
overall_ep_counterf$counter_post_overall %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
mutate(model = "EPTB overall")
) %>%
select(model, year, diff_inc100k, diff_inc100k.lower:rr_inc100k.upper,
cases_averted:cases_averted.upper,
pct_change:pct_change.upper) %>%
transmute(model=model, year=year,
diff_cnr = glue("{diff_inc100k} ({diff_inc100k.lower} to {diff_inc100k.upper})"),
rr = glue("{rr_inc100k} ({rr_inc100k.lower} to {rr_inc100k.upper})"),
cases_averted = glue("{cases_averted} ({cases_averted.lower} to {cases_averted.upper})"),
pct_change = glue("{pct_change} ({pct_change.lower} to {pct_change.upper})")) %>%
write_csv(here("figures/table2.csv"))
Ward-level extra-pulmonary estimates in graphical form.
plot_counterfactual(model_data = mdata_extrapulmonary, model=m_extrapulmonary, outcome = inc_100k,
population_denominator = population_without_inst_ship, grouping_var = ward,re_formula =~(y_num*acf_period | ward),
ward) + scale_y_continuous(limits= c(0,75))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Warning: Removed 37 rows containing missing values or values outside the scale range (`geom_line()`).
ggsave(here("figures/s11.tiff"), width=10, height=12)
Warning: Removed 37 rows containing missing values or values outside the scale range (`geom_line()`).
Numerical summary.
ward_change_extrapulmonary <- summarise_change(model_data = mdata_extrapulmonary, model = m_extrapulmonary,
population_denominator = population_without_inst_ship, grouping_var=ward,
re_formula = ~(y_num*acf_period | ward))
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
`summarise()` has grouped output by '.draw', 'y_num'. You can override using the `.groups` argument.
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
#want to keep the summary estimates here
tokeep <- c("peak_summary", "level_summary", "slope_summary")
#summary measures in a table
ward_change_extrapulmonary %>%
keep(names(.) %in% tokeep) %>%
bind_rows() %>%
mutate(across(c(estimate:.upper), number, accuracy=0.01)) %>%
select(measure, everything()) %>%
datatable()
NA
NA
NA
Fit the model
(Not rewritten the functions for this yet)
mdata_age_sex <- cases_by_age_sex %>%
filter(tb_type=="Pulmonary") %>%
mutate(acf_period = case_when(year %in% c(1950:1956) ~ "a. pre-acf",
year %in% c(1957) ~ "b. acf",
year %in% c(1958:1963) ~ "c. post-acf")) %>%
mutate(year2 = year+0.5) %>%
group_by(age, sex) %>%
mutate(y_num = row_number()) %>%
ungroup()
m_age_sex <- brm(
cases ~ y_num + (acf_period)*(age*sex) + (acf_period:y_num)*(age*sex),
data = mdata_age_sex,
family = negbinomial(),
seed = 1234,
chains = 4, cores = 4,
prior = prior(normal(0,1), class = Intercept) +
prior(gamma(0.01, 0.01), class = shape) +
prior(normal(0, 1), class = b))
Compiling Stan program...
Start sampling
starting worker pid=59955 on localhost:11039 at 16:33:34.168
starting worker pid=59968 on localhost:11039 at 16:33:34.257
starting worker pid=59981 on localhost:11039 at 16:33:34.351
starting worker pid=59994 on localhost:11039 at 16:33:34.439
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 5.5e-05 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.55 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 4.1e-05 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.41 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 4.5e-05 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.45 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 3.9e-05 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.39 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 6.922 seconds (Warm-up)
Chain 2: 7.223 seconds (Sampling)
Chain 2: 14.145 seconds (Total)
Chain 2:
Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 6.971 seconds (Warm-up)
Chain 1: 7.951 seconds (Sampling)
Chain 1: 14.922 seconds (Total)
Chain 1:
Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 6.968 seconds (Warm-up)
Chain 3: 8.251 seconds (Sampling)
Chain 3: 15.219 seconds (Total)
Chain 3:
Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 7.126 seconds (Warm-up)
Chain 4: 8.202 seconds (Sampling)
Chain 4: 15.328 seconds (Total)
Chain 4:
Warning message:
Warning message:
package ‘StanHeaders’ was built under R version 4.3.3
package ‘StanHeaders’ was built under R version 4.3.3
Warning message:
package ‘StanHeaders’ was built under R version 4.3.3
Warning message:
package ‘StanHeaders’ was built under R version 4.3.3
summary(m_age_sex)
Family: negbinomial
Links: mu = log; shape = identity
Formula: cases ~ y_num + (acf_period) * (age * sex) + (acf_period:y_num) * (age * sex)
Data: mdata_age_sex (Number of observations: 224)
Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
total post-warmup draws = 4000
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept 4.42 0.12 4.19 4.64 1.00 1415 2552
y_num -0.17 0.03 -0.23 -0.11 1.00 1302 2466
acf_periodb.acf -0.01 1.02 -2.04 2.00 1.00 7210 2491
acf_periodc.postMacf -0.49 0.33 -1.15 0.16 1.00 2357 2566
age06_15 0.63 0.15 0.33 0.94 1.00 2058 2788
age16_25 1.86 0.14 1.60 2.13 1.00 1655 2712
age26_35 1.13 0.14 0.85 1.40 1.00 1729 2902
age36_45 0.28 0.15 -0.02 0.57 1.00 2101 2790
age46_55 -0.62 0.18 -0.97 -0.28 1.00 2044 2768
age56_65 -1.07 0.19 -1.45 -0.71 1.00 2541 2475
age65P -1.63 0.22 -2.06 -1.22 1.00 2849 2848
sexM 0.16 0.15 -0.14 0.46 1.00 1406 2375
age06_15:sexM -0.43 0.21 -0.83 -0.02 1.00 1953 2438
age16_25:sexM -0.58 0.18 -0.94 -0.22 1.00 1673 2923
age26_35:sexM -0.34 0.19 -0.70 0.03 1.00 1732 2640
age36_45:sexM 0.24 0.20 -0.15 0.63 1.00 1860 2940
age46_55:sexM 1.15 0.22 0.71 1.57 1.00 1801 2217
age56_65:sexM 1.13 0.23 0.68 1.58 1.00 1997 2688
age65P:sexM 0.99 0.26 0.47 1.51 1.00 2488 2846
acf_periodb.acf:age06_15 0.01 0.98 -1.90 1.92 1.00 7432 3008
acf_periodc.postMacf:age06_15 -0.59 0.52 -1.58 0.43 1.00 3652 3358
acf_periodb.acf:age16_25 0.05 0.97 -1.83 1.95 1.00 6881 2982
acf_periodc.postMacf:age16_25 0.73 0.43 -0.10 1.58 1.00 3080 3224
acf_periodb.acf:age26_35 0.04 0.95 -1.82 1.91 1.00 8078 3377
acf_periodc.postMacf:age26_35 0.66 0.43 -0.17 1.50 1.00 3365 3143
acf_periodb.acf:age36_45 0.05 0.96 -1.86 1.91 1.00 6384 2598
acf_periodc.postMacf:age36_45 0.75 0.45 -0.15 1.62 1.00 3425 3123
acf_periodb.acf:age46_55 0.06 0.96 -1.81 1.92 1.00 6486 3069
acf_periodc.postMacf:age46_55 0.85 0.48 -0.11 1.82 1.00 3452 2999
acf_periodb.acf:age56_65 0.04 1.01 -1.95 2.04 1.00 7570 3142
acf_periodc.postMacf:age56_65 0.63 0.52 -0.37 1.64 1.00 3656 2661
acf_periodb.acf:age65P 0.05 1.02 -1.95 1.99 1.00 7361 2833
acf_periodc.postMacf:age65P 0.96 0.55 -0.12 2.06 1.00 3901 2909
acf_periodb.acf:sexM -0.01 0.99 -1.91 1.96 1.00 6593 2720
acf_periodc.postMacf:sexM -0.05 0.37 -0.78 0.69 1.00 2799 2989
y_num:acf_periodb.acf -0.10 0.13 -0.36 0.16 1.00 6943 2798
y_num:acf_periodc.postMacf 0.04 0.04 -0.03 0.12 1.00 2115 2514
acf_periodb.acf:age06_15:sexM 0.01 0.99 -1.89 1.95 1.00 5773 3114
acf_periodc.postMacf:age06_15:sexM -0.59 0.63 -1.82 0.66 1.00 4774 3262
acf_periodb.acf:age16_25:sexM 0.01 1.03 -2.00 2.05 1.00 6588 2695
acf_periodc.postMacf:age16_25:sexM 0.65 0.53 -0.37 1.70 1.00 3815 3544
acf_periodb.acf:age26_35:sexM 0.00 0.97 -1.87 1.91 1.00 6304 2724
acf_periodc.postMacf:age26_35:sexM 0.41 0.52 -0.61 1.43 1.00 4354 3045
acf_periodb.acf:age36_45:sexM -0.00 0.96 -1.90 1.86 1.00 6501 3017
acf_periodc.postMacf:age36_45:sexM 0.10 0.55 -0.97 1.16 1.00 4082 3044
acf_periodb.acf:age46_55:sexM -0.00 1.01 -1.97 2.04 1.00 6138 3169
acf_periodc.postMacf:age46_55:sexM 0.66 0.55 -0.41 1.75 1.00 3807 3054
acf_periodb.acf:age56_65:sexM 0.01 1.02 -1.96 2.02 1.00 5999 2750
acf_periodc.postMacf:age56_65:sexM 0.34 0.58 -0.80 1.45 1.00 4410 2601
acf_periodb.acf:age65P:sexM -0.02 1.00 -1.92 1.89 1.00 7236 3275
acf_periodc.postMacf:age65P:sexM 0.28 0.63 -0.93 1.51 1.00 4482 2948
y_num:acf_perioda.preMacf:age06_15 0.02 0.04 -0.05 0.10 1.00 1757 2661
y_num:acf_periodb.acf:age06_15 0.15 0.13 -0.10 0.41 1.00 6579 2817
y_num:acf_periodc.postMacf:age06_15 0.08 0.05 -0.02 0.17 1.00 3209 2988
y_num:acf_perioda.preMacf:age16_25 0.12 0.03 0.06 0.19 1.00 1487 2597
y_num:acf_periodb.acf:age16_25 0.24 0.13 -0.01 0.49 1.00 5988 3000
y_num:acf_periodc.postMacf:age16_25 -0.04 0.04 -0.12 0.04 1.00 2624 3146
y_num:acf_perioda.preMacf:age26_35 0.15 0.03 0.08 0.22 1.00 1508 2497
y_num:acf_periodb.acf:age26_35 0.31 0.12 0.07 0.55 1.00 7540 3386
y_num:acf_periodc.postMacf:age26_35 0.02 0.04 -0.06 0.10 1.00 2647 2940
y_num:acf_perioda.preMacf:age36_45 0.17 0.04 0.10 0.24 1.00 1676 2803
y_num:acf_periodb.acf:age36_45 0.40 0.13 0.16 0.65 1.00 5745 3142
y_num:acf_periodc.postMacf:age36_45 0.06 0.04 -0.02 0.14 1.00 2614 2787
y_num:acf_perioda.preMacf:age46_55 0.19 0.04 0.11 0.27 1.00 1816 2773
y_num:acf_periodb.acf:age46_55 0.44 0.13 0.19 0.69 1.00 5843 3198
y_num:acf_periodc.postMacf:age46_55 0.09 0.04 0.01 0.18 1.00 2820 2402
y_num:acf_perioda.preMacf:age56_65 0.17 0.05 0.08 0.26 1.00 2142 2731
y_num:acf_periodb.acf:age56_65 0.39 0.13 0.13 0.65 1.00 6828 2866
y_num:acf_periodc.postMacf:age56_65 0.11 0.05 0.02 0.20 1.00 3038 2886
y_num:acf_perioda.preMacf:age65P 0.23 0.05 0.13 0.32 1.00 2187 2906
y_num:acf_periodb.acf:age65P 0.43 0.14 0.16 0.70 1.00 6816 2802
y_num:acf_periodc.postMacf:age65P 0.11 0.05 0.01 0.20 1.00 3106 2633
y_num:acf_perioda.preMacf:sexM 0.01 0.04 -0.06 0.09 1.00 1356 2575
y_num:acf_periodb.acf:sexM -0.04 0.14 -0.31 0.23 1.00 5779 2827
y_num:acf_periodc.postMacf:sexM -0.01 0.04 -0.09 0.06 1.00 2093 2899
y_num:acf_perioda.preMacf:age06_15:sexM 0.00 0.05 -0.11 0.10 1.00 1861 2659
y_num:acf_periodb.acf:age06_15:sexM 0.04 0.14 -0.23 0.31 1.00 5277 2941
y_num:acf_periodc.postMacf:age06_15:sexM 0.10 0.06 -0.02 0.21 1.00 3700 3141
y_num:acf_perioda.preMacf:age16_25:sexM -0.00 0.05 -0.09 0.09 1.00 1619 2848
y_num:acf_periodb.acf:age16_25:sexM 0.06 0.14 -0.22 0.35 1.00 5687 2961
y_num:acf_periodc.postMacf:age16_25:sexM -0.01 0.05 -0.11 0.09 1.00 2988 3037
y_num:acf_perioda.preMacf:age26_35:sexM -0.01 0.05 -0.10 0.08 1.00 1625 2712
y_num:acf_periodb.acf:age26_35:sexM 0.05 0.14 -0.22 0.33 1.00 5222 2964
y_num:acf_periodc.postMacf:age26_35:sexM -0.01 0.05 -0.10 0.09 1.00 2960 2838
y_num:acf_perioda.preMacf:age36_45:sexM -0.01 0.05 -0.11 0.08 1.00 1667 2795
y_num:acf_periodb.acf:age36_45:sexM 0.00 0.13 -0.26 0.26 1.00 5498 3062
y_num:acf_periodc.postMacf:age36_45:sexM -0.00 0.05 -0.10 0.10 1.00 2981 2638
y_num:acf_perioda.preMacf:age46_55:sexM -0.01 0.05 -0.11 0.10 1.00 1749 2303
y_num:acf_periodb.acf:age46_55:sexM -0.00 0.14 -0.29 0.28 1.00 5539 2971
y_num:acf_periodc.postMacf:age46_55:sexM -0.06 0.05 -0.17 0.04 1.00 2851 2957
y_num:acf_perioda.preMacf:age56_65:sexM 0.05 0.06 -0.06 0.16 1.00 1827 2578
y_num:acf_periodb.acf:age56_65:sexM 0.07 0.15 -0.22 0.36 1.00 5489 2712
y_num:acf_periodc.postMacf:age56_65:sexM 0.02 0.05 -0.09 0.12 1.00 3411 3130
y_num:acf_perioda.preMacf:age65P:sexM 0.01 0.06 -0.11 0.12 1.00 2108 2432
y_num:acf_periodb.acf:age65P:sexM 0.08 0.14 -0.19 0.36 1.00 6122 3176
y_num:acf_periodc.postMacf:age65P:sexM 0.01 0.06 -0.10 0.12 1.00 3429 2926
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
shape 203.04 67.43 107.48 371.91 1.00 2024 2752
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
plot(m_age_sex)
pp_check(m_age_sex, type='ecdf_overlay')
Using 10 posterior draws for ppc type 'ecdf_overlay' by default.
Summarise posterior
#posterior draws, and summarise
age_sex_summary <- mdata_age_sex %>%
select(year, year2, y_num, acf_period, age, sex) %>%
add_epred_draws(m_age_sex) %>%
group_by(year2, acf_period, age, sex) %>%
mean_qi() %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Before Intervention",
acf_period=="c. post-acf" ~ "Post Intervention"))
#create the counterfactual (no intervention), and summarise
age_sex_counterfact <-
tibble(year = mdata_age_sex$year,
year2 = mdata_age_sex$year2,
y_num = mdata_age_sex$y_num,
age = mdata_age_sex$age,
sex = mdata_age_sex$sex,
acf_period = factor("a. pre-acf")) %>%
add_epred_draws(m_age_sex) %>%
group_by(year2, acf_period, age, sex) %>%
mean_qi() %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Before Intervention",
acf_period=="c. post-acf" ~ "Post Intervention")) %>%
ungroup() %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
mutate(sex = case_when(sex== "M" ~ "Male",
sex== "F" ~ "Female"))
age_sex_summary %>%
ungroup() %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
mutate(sex = case_when(sex== "M" ~ "Male",
sex== "F" ~ "Female")) %>%
ggplot() +
geom_ribbon(aes(ymin=.epred.lower, ymax=.epred.upper, x=year2, group = acf_period, fill=acf_period), alpha=0.5) +
geom_ribbon(data = age_sex_counterfact %>% filter(year>=1956),
aes(ymin=.epred.lower, ymax=.epred.upper, x=year2, fill="Counterfactual"), alpha=0.5) +
geom_line(data = age_sex_counterfact %>% filter(year>=1956),
aes(y=.epred, x=year2, colour="Counterfactual")) +
geom_line(aes(y=.epred, x=year2, group=acf_period, colour=acf_period)) +
geom_point(data = mdata_age_sex %>%
ungroup() %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
mutate(sex = case_when(sex== "M" ~ "Male",
sex== "F" ~ "Female")) %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Before Intervention",
acf_period=="b. acf" ~ "Counterfactual",
acf_period=="c. post-acf" ~ "Post Intervention"))%>%
mutate(acf_period2 = case_when(acf_period=="Before Intervention" ~ "Pre-ACF",
acf_period=="Counterfactual" ~ "ACF",
acf_period=="Post Intervention" ~ "Post-ACF")),
aes(y=cases, x=year2, shape=fct_relevel(acf_period2,
"Pre-ACF",
"ACF",
"Post-ACF")), size=2) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
facet_grid(fct_relevel(age, "65 & up y", "56 to 65y", "46 to 55y", "36 to 45y", "26 to 35y", "16 to 25y", "06 to 15y", "0 to 5y")~sex,
scales = "free_y") +
theme_ggdist() +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
scale_fill_manual(values = c("#DE0D92", "grey50", "#4D6CFA") , name="Model estimates:") +
scale_colour_manual(values = c("#DE0D92", "grey50", "#4D6CFA") , name="Model estimates:") +
scale_shape_discrete(name="Emprical data (period):", na.translate = F) +
labs(
x = "Year",
y = "Case notifications (n)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme(legend.position = "bottom",
legend.box="vertical",
panel.border = element_rect(colour = "grey78", fill=NA))
ggsave(here("figures/s13.tiff"), height=10)
Saving 7.29 x 10 in image
Calculate summary effects
#Peak
out_age_sex_1 <- crossing(mdata_age_sex %>%
select(y_num, age, sex) %>%
filter(y_num == 8),
acf_period = c("a. pre-acf", "b. acf"))
peak_draws_age_sex <- add_epred_draws(newdata = out_age_sex_1,
object = m_age_sex) %>%
group_by(.draw, age, sex) %>%
summarise(estimate = last(.epred)/first(.epred)) %>%
ungroup() %>%
mutate(measure = "RR.peak")
`summarise()` has grouped output by '.draw', 'age'. You can override using the `.groups` argument.
peak_summary_age_sex <- peak_draws_age_sex %>%
group_by(age, sex) %>%
mean_qi(estimate) %>%
mutate(measure = "RR.peak")
#Level
out_age_sex_2 <- crossing(mdata_age_sex %>%
select(y_num, age, sex) %>%
filter(y_num == 9),
acf_period = c("a. pre-acf", "c. post-acf"))
level_draws_age_sex <- add_epred_draws(newdata = out_age_sex_2,
object = m_age_sex) %>%
arrange(y_num, .draw) %>%
group_by(.draw, age, sex) %>%
summarise(estimate = last(.epred)/first(.epred)) %>%
ungroup() %>%
mutate(measure = "RR.level")
`summarise()` has grouped output by '.draw', 'age'. You can override using the `.groups` argument.
level_summary_age_sex <- level_draws_age_sex %>%
group_by(age, sex) %>%
mean_qi(estimate) %>%
mutate(measure = "RR.level")
#Slope
out_age_sex_3 <- crossing(mdata_age_sex %>%
select(y_num, age, sex) %>%
filter(y_num %in% c(9,14)),
acf_period = c("a. pre-acf", "c. post-acf"))
slope_draws_age_sex <- add_epred_draws(newdata = out_age_sex_3,
object = m_age_sex) %>%
arrange(y_num) %>%
ungroup() %>%
group_by(.draw, y_num, age, sex) %>%
summarise(slope = last(.epred)/first(.epred)) %>%
ungroup() %>%
group_by(.draw, age, sex) %>%
summarise(estimate = last(slope)/first(slope)) %>%
mutate(measure = "RR.slope")
`summarise()` has grouped output by '.draw', 'y_num', 'age'. You can override using the `.groups` argument.
`summarise()` has grouped output by '.draw', 'age'. You can override using the `.groups` argument.
slope_summary_age_sex <- slope_draws_age_sex %>%
group_by(age, sex) %>%
median_qi(estimate) %>%
mutate(measure = "RR.slope")
Numerical summary of these summary results
bind_rows(
peak_summary_age_sex, level_summary_age_sex, slope_summary_age_sex
) %>%
mutate(across(c(estimate:.upper), number, accuracy=0.01)) %>%
select(measure, everything()) %>%
datatable()
NA
NA
NA
As a figure
peak_g_age_sex <- peak_summary_age_sex %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
ggplot() +
geom_hline(aes(yintercept=1), linetype=2)+
geom_pointrange(aes(x=age, y=estimate, ymin=.lower, ymax=.upper, group=sex, colour=sex, shape=sex),
position = position_dodge(width = 0.5)) +
scale_colour_manual(values = c("#CD7AC5", "cadetblue3"), name="") +
scale_shape(name="") +
labs(x="",
y="Relative rate (95% UI)") +
theme_ggdist() +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA))
#level plot
level_g_age_sex <- level_summary_age_sex %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
ggplot() +
geom_hline(aes(yintercept=1), linetype=2)+
geom_pointrange(aes(x=age, y=estimate, ymin=.lower, ymax=.upper, group=sex, colour=sex, shape=sex),
position = position_dodge(width = 0.5)) +
scale_colour_manual(values = c("#CD7AC5", "cadetblue3"), name="") +
scale_shape(name="") +
labs(x="",
y="Relative rate (95% UI)") +
theme_ggdist() +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA))
#slope plot
slope_g_age_sex <- slope_summary_age_sex %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
ggplot() +
geom_hline(aes(yintercept=1), linetype=2)+
geom_pointrange(aes(x=age, y=estimate, ymin=.lower, ymax=.upper, group=sex, colour=sex, shape=sex),
position = position_dodge(width = 0.5)) +
scale_colour_manual(values = c("#CD7AC5", "cadetblue3"), name="") +
scale_shape(name="") +
labs(x="",
y="Relative rate (95% UI)") +
theme_ggdist() +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA))
counterfact_age_sex <-
add_epred_draws(object = m_age_sex,
newdata = mdata_age_sex %>%
select(year, year2, y_num, age, sex) %>%
mutate(acf_period = "a. pre-acf")) %>%
filter(year>1957) %>%
select(year, age, sex, .draw, .epred_counterf = .epred)
Adding missing grouping variables: `year2`, `y_num`, `acf_period`, `.row`
#Calcuate predicted number of cases per draw, then summarise.
post_change_age_sex <-
add_epred_draws(object = m_age_sex,
newdata = mdata_age_sex %>%
select(year, year2, y_num, age, sex, acf_period)) %>%
filter(year>1957) %>%
ungroup() %>%
select(year, age, sex, .draw, .epred)
#for the overall period
counterfact_overall_age_sex <-
add_epred_draws(object = m_age_sex,
newdata = mdata_age_sex %>%
select(year, year2, y_num, age, sex) %>%
mutate(acf_period = "a. pre-acf")) %>%
filter(year>1957) %>%
select(age, sex, .draw, .epred) %>%
group_by(age, sex, .draw) %>%
summarise(.epred_counterf = sum(.epred)) %>%
mutate(year = "Overall (1958-1963)")
Adding missing grouping variables: `year`, `year2`, `y_num`, `acf_period`, `.row`
`summarise()` has grouped output by 'age', 'sex'. You can override using the `.groups` argument.
#Calcuate incidence per draw, then summarise.
post_change_overall_age_sex <-
add_epred_draws(object = m_age_sex,
newdata = mdata_age_sex %>%
select(year, year2, y_num, age, sex, acf_period)) %>%
filter(year>1957) %>%
select(age, sex, .draw, .epred) %>%
group_by(.draw, age, sex) %>%
summarise(.epred = sum(.epred))
Adding missing grouping variables: `year`, `year2`, `y_num`, `acf_period`, `.row`
`summarise()` has grouped output by '.draw', 'age'. You can override using the `.groups` argument.
counter_post_overall_age_sex <-
left_join(counterfact_overall_age_sex, post_change_overall_age_sex) %>%
mutate(cases_averted = .epred_counterf-.epred,
pct_change = (.epred - .epred_counterf)/.epred_counterf) %>%
group_by(age, sex) %>%
mean_qi(cases_averted, pct_change) %>%
ungroup() %>%
mutate(year = "Overall (1958-1963)")
Joining with `by = join_by(age, sex, .draw)`
age_sex_txt <- counter_post_overall_age_sex %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
transmute(year = as.character(year),
sex = sex,
age = age,
cases_averted = glue::glue("{cases_averted}\n({cases_averted.lower} to {cases_averted.upper})"),
pct_change = glue::glue("{pct_change}\n({pct_change.lower} to {pct_change.upper})"))
age_sex_txt %>% datatable()
NA
NA
counterfactual_g_age_sex <- counter_post_overall_age_sex %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
ggplot() +
geom_pointrange(aes(x = age, y=cases_averted, ymin=cases_averted.lower, ymax=cases_averted.upper, colour=sex, shape=sex), position=position_dodge(width=0.5)) +
scale_colour_manual(values = c("#CD7AC5", "cadetblue3"), name="") +
scale_shape(name="") +
scale_y_continuous(labels = comma) +
labs(x="",
y="Number (95% UI)",
colour="") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA),
legend.position = "bottom")
counterfactual_g_age_sex
Join together for Figure 3.
(peak_g_age_sex + level_g_age_sex) / (slope_g_age_sex + counterfactual_g_age_sex) + plot_annotation(tag_levels = "A") + plot_layout(guides = "collect") & theme(legend.position = "bottom")
ggsave(here("figures/f3.tiff"), width = 12, height=8)
NA
NA